######################
#era5_working
#paul stainier
#started 2/3/2020
#create the weather bins for each ERA5 gridpoint 
#merge with Indian district map
#####################


rm(list = ls())
library(dplyr)
options(dplyr.summarise.inform = FALSE)
library(lubridate)
library(sp)
library(sf)
library(ggplot2)
library(sqldf)
library(RColorBrewer)
library(ggmap)
library(raster)
library(geosphere)
library(forestmangr)
library(purrr)
library(reshape)
library(reshape2)
library(collapse)
library(readr)
library(data.table)

#packages that were used for crosswalk but don't exist for this version of R
#library(maptools)
#library(rgeos)


#########################
#toggle T/F for the part that 
#you want to run 
#########################
crosswalk_spatial <- T
crosswalk_spatial_61 <- T
#nss weather
tmax_bins_and_districts <- T
prec_districts <- T
tmax_variables<- T
prec_totals <- T
prec_variables <-T
#nfhs weather
tmax_bins_and_districts_nfhs <- T
prec_districts_nfhs <- T
tmax_variables_nfhs<- T
prec_totals_nfhs <- T
prec_variables_nfhs <-T

#icrisat weather
tmax_bins_and_districts_icrisat <- T
prec_districts_icrisat <- T
tmax_variables_icrisat<- T
prec_totals_icrisat <- T
prec_variables_icrisat <-T


#nss weather extension 2015-2024
tmax_bins_and_districts_2014_2024 <- T
tmax_variables_2015_2024<- T

#########################
#replace with own directories before running
#########################

input_directory <- 
output_directory <- 
crosswalk_directory <- 



######################
#crosswalk_spatial
#create the spatial crosswalk 
#between era5 points and indian districts
#match with distid3
#####################
if(crosswalk_spatial){
  #load the era5 grid cell and relevant district code latitude and longitude data
  setwd(crosswalk_directory)
  era5_latlon <- read.csv("era5_latlon.csv")
  era5_latlon$names <- 1:length(era5_latlon$lat)
  centroid_data <- read.csv("district_centroids.csv")
  centroid_data$district_num <- 1:length(centroid_data$State)
  
  #download the district code shape files
  india_shapes <- readRDS("IND_adm2.rds")
  
  #list of districts
  #make sure to use ID_2 and not OBJECTID: there is a repeated ID_2 (364 - Palghar)
  #so the numbers for the OBJECTID are off after 364
  district_list <- india_shapes@data[["ID_2"]]
  district_list <- district_list[!duplicated(district_list)]
  
  #create a data frame that will be used to store the crosswalk
  crosswalk <- as.data.frame(matrix(NA, 1, 3))
  colnames(crosswalk) <- c("lat", "lon", "district_map_id")
  
  district_names <- as.data.frame(matrix(NA, 1, 3))
  colnames(district_names) <- c("district_name", "state_name", "district_map_id")
  
  #initiate a list of districts that are small enough not to contain
  small_districts <- 27
  
  #create a data frame with the relevant era5 data
  era5_df <- data.frame(Longitude = era5_latlon$lon,
                        Latitude = era5_latlon$lat,
                        names = 1:length(era5_latlon$lon))
  
  #assign it coordinates to get it ready for the comparison with the district  shape files
  coordinates(era5_df) <- ~ Longitude + Latitude
  proj4string(era5_df) <- CRS("+proj=longlat")
  
  centroid_df <- data.frame(Longitude = centroid_data$Longitude,
                            Latitude = centroid_data$Latitude,
                            names = centroid_data$district_num)
  coordinates(centroid_df) <- ~ Longitude + Latitude
  proj4string(centroid_df) <- CRS("+proj=longlat")
  
  
  #figure out which era5 gridpoints have centroids within a particular district
  #loop through all relevant districts 
  for(district in district_list){
    
    #get the shape file of the district
    district_shape <- india_shapes[which(india_shapes$ID_2 == district),]
    
    #assign the same coordinate reference system to the district shapefile
    #as to the era5 dataframe
    proj4string(district_shape) <- proj4string(era5_df)
    
    #match era5 grid point centroids to districts that contain them
    era5_matches <- over(district_shape, era5_df, returnList = TRUE)
    centroid_matches <- over(district_shape, centroid_df, returnList = TRUE)
    
    
    #add the matches to the crosswalk file
    #if there is no match, add this district to a list of small districts
    if(length(era5_matches[[names(era5_matches)]][["names"]]) > 0 ){
      district_rows <- as.data.frame(era5_latlon$lat[which((era5_latlon$names) %in% era5_matches[[names(era5_matches)]][["names"]])])
      colnames(district_rows)[1] <- "lat"
      district_rows$lon <- era5_latlon$lon[which((era5_latlon$names) %in% era5_matches[[names(era5_matches)]][["names"]])]
      district_rows$district_map_id <- district
      crosswalk<-rbind(crosswalk, district_rows)
    }else{
      small_districts <- c(small_districts, district)
    }
  }
  crosswalk <- crosswalk[!duplicated(crosswalk),]
  
  calculated_centroids <- as.data.frame(getSpPPolygonsLabptSlots(india_shapes))
  colnames(calculated_centroids)[1] <- "lon"
  colnames(calculated_centroids)[2] <- "lat"
  calculated_centroids$district_map_id <- 1:length(calculated_centroids$lat)
  
  #set up a data frame for the crosswalk for small districts
  smallcrosswalk <- as.data.frame(matrix(NA, 1, 5))
  colnames(smallcrosswalk)[1] <- "lat"
  colnames(smallcrosswalk)[2] <- "lon"
  colnames(smallcrosswalk)[3] <- "district_map_id"
  colnames(smallcrosswalk)[4] <- "distances"
  colnames(smallcrosswalk)[5] <- "closest"
  
  #get the small districts centroids
  small_district_centroids <- calculated_centroids[which(calculated_centroids$district_map_id %in% small_districts),]
  
  #set a threshold of 15 miles for the largest distance between a district centroid and an ERA5 grid point
  threshold <- 15
  
  #add the era5 grid points which are close enough to each district to the crosswalk, for now
  for(district_index in 1:length(small_district_centroids$district_map_id)){
    for (era5_index in 1:length(era5_latlon$lon)){
      distance <- distHaversine(c(small_district_centroids$lon[district_index], small_district_centroids$lat[district_index]), c(era5_latlon$lon[era5_index], era5_latlon$lat[era5_index]), r=6378137)
      
      if (distance*0.000621371 < threshold){ #meters to miles
        new_row <- c(era5_latlon$lat[era5_index], era5_latlon$lon[era5_index], small_district_centroids$district_map_id[district_index], distance*0.000621371, 0)
        smallcrosswalk <- rbind(smallcrosswalk, new_row)
      }
    }
    
    #assign a value of 1 to a column named "closest" for the
    #closest era5 gridpoint to each district
    district_rows <- which(smallcrosswalk$district_map_id==small_district_centroids$district_map_id[district_index])
    min_district <- which.min(smallcrosswalk$distances[district_rows])
    smallcrosswalk$closest[district_rows[min_district]] <- 1
  }
  
  #only keep the closest era5 grid point to the district centroid
  #these districts are small - only one is necessary
  smallcrosswalk_mindist <- smallcrosswalk[which(smallcrosswalk$closest == 1),]
  
  #join the crosswalk for small districts to the one for big districts
  crosswalk <- rbind(crosswalk, smallcrosswalk_mindist[,1:3])
  
  crosswalk <- crosswalk[complete.cases(crosswalk),]
  
  district_state_names <- as.data.frame(cbind(india_shapes$ID_2, india_shapes$NAME_1, india_shapes$NAME_2))
  district_state_names <- district_state_names[!duplicated(district_state_names),]
  names(district_state_names) <- c("district_map_id", "state_name", "district_name")
  
  
  crosswalk <- merge(crosswalk, district_state_names, by = "district_map_id")
  write.csv(crosswalk, "spatial_crosswalk_era5.csv", row.names = FALSE)
  
  
  mapdistrict_distid3_crosswalk <- read.csv("mapdistrict_distid3_crosswalk.csv")
  mapdistrict_distid3_crosswalk <- dplyr::select(mapdistrict_distid3_crosswalk, district_map_id, distid3)
  crosswalk_nss <- merge(crosswalk, mapdistrict_distid3_crosswalk, by = "district_map_id")
  
  #manually add rows for Delhi because the shapefile only has one for Delhi
  #assign the closest point to each Delhi district
  crosswalk_nss <- crosswalk_nss[-which(crosswalk_nss$district_map_id == 415),] 
  delhi_rows <- read.csv("delhi_era5_crosswalk.csv")
  crosswalk_nss <- rbind(crosswalk_nss, delhi_rows)
  
  #save the crosswalk file
  write.csv(crosswalk_nss, "era5_distid3_spatial_crosswalk.csv", row.names=FALSE)
}

if(crosswalk_spatial_61){
  #load the era5 grid cell and relevant district code latitude and longitude data
  setwd(crosswalk_directory)
  era5_latlon <- read.csv("era5_latlon.csv")
  era5_latlon$names <- 1:length(era5_latlon$lat)
 
  #download the district code shape files
  setwd("/Users/paulstainier/Dropbox/1_rii/2_working/input/crosswalk/district61")
  india_shapes61 <- read_sf(dsn = ".", layer = "district61")
  india_shapes61 <- as_Spatial(india_shapes61)
  
  #save the district names to merge with icrisat names
  district_names <- data.frame(district_name = india_shapes61$NAME, 
                               state_name = india_shapes61$STATE_UT,
                               district_map_id = india_shapes61$DIST61_ID)

  district_names <- unique(district_names)
  write.csv(district_names, "india_districts_shapefile61.csv", row.names = F)
  
  #list of districts
  district_list <- india_shapes61$DIST61_ID
  district_list <- unique(district_list)
  
  #create a data frame that will be used to store the crosswalk
  crosswalk <- as.data.frame(matrix(NA, 1, 3))
  colnames(crosswalk) <- c("lat", "lon", "district_map_id")
  
  
  #initiate a list of districts that are small enough not to contain
  small_districts <- NA
  
  #create a data frame with the relevant era5 data
  era5_df <- data.frame(Longitude = era5_latlon$lon,
                        Latitude = era5_latlon$lat,
                        names = 1:length(era5_latlon$lon))
  
  #assign it coordinates to get it ready for the comparison with the district  shape files
  coordinates(era5_df) <- ~ Longitude + Latitude
  proj4string(era5_df) <- CRS("+proj=longlat")
  
  
  #figure out which era5 gridpoints have centroids within a particular district
  #loop through all relevant districts 
  dist_index <- 1
  for(district in district_list){
    
    #get the shape file of the district
    district_shape <- india_shapes61[which(india_shapes61$DIST61_ID == district),]
    if(length(district_shape) >1){
      district_shape <- district_shape[1,]
    }
    
    #assign the same coordinate reference system to the district shapefile
    #as to the era5 dataframe
    proj4string(district_shape) <- proj4string(era5_df)
    
    #match era5 grid point centroids to districts that contain them
    era5_matches <- over(district_shape, era5_df, returnList = TRUE)
    
    #add the matches to the crosswalk file
    #if there is no match, add this district to a list of small districts
    if(length(era5_matches[[names(era5_matches)]][["names"]]) > 0 ){
      district_rows <- as.data.frame(era5_latlon$lat[which((era5_latlon$names) %in% era5_matches[[names(era5_matches)]][["names"]])])
      colnames(district_rows)[1] <- "lat"
      district_rows$lon <- era5_latlon$lon[which((era5_latlon$names) %in% era5_matches[[names(era5_matches)]][["names"]])]
      district_rows$district_map_id <- district
      crosswalk<-rbind(crosswalk, district_rows)
    }else{
      small_districts <- c(small_districts, dist_index)
    }
    dist_index <- dist_index + 1
  }
  
  small_districts <- small_districts[!is.na(small_districts)]
  calculated_centroids <- as.data.frame(getSpPPolygonsLabptSlots(india_shapes61))
  colnames(calculated_centroids)[1] <- "lon"
  colnames(calculated_centroids)[2] <- "lat"
  calculated_centroids$district_map_index <- 1:length(calculated_centroids$lat)
  
  #set up a data frame for the crosswalk for small districts
  smallcrosswalk <- as.data.frame(matrix(NA, 1, 5))
  colnames(smallcrosswalk)[1] <- "lat"
  colnames(smallcrosswalk)[2] <- "lon"
  colnames(smallcrosswalk)[3] <- "district_map_index"
  colnames(smallcrosswalk)[4] <- "distances"
  colnames(smallcrosswalk)[5] <- "closest"
  
  #get the small districts centroids
  small_district_centroids <- calculated_centroids[which(calculated_centroids$district_map_index %in% small_districts),]
  
  #set a threshold of 15 miles for the largest distance between a district centroid and an ERA5 grid point
  threshold <- 15
  
  #add the era5 grid points which are close enough to each district to the crosswalk, for now
  for(district_index in 1:length(small_district_centroids$district_map_index)){
    for (era5_index in 1:length(era5_latlon$lon)){
      distance <- distHaversine(c(small_district_centroids$lon[district_index], small_district_centroids$lat[district_index]), c(era5_latlon$lon[era5_index], era5_latlon$lat[era5_index]), r=6378137)
      if (distance*0.000621371 < threshold){ #meters to miles
        new_row <- c(era5_latlon$lat[era5_index], era5_latlon$lon[era5_index], small_district_centroids$district_map_index[district_index], distance*0.000621371, 0)
        smallcrosswalk <- rbind(smallcrosswalk, new_row)
      }
    }
    
    #assign a value of 1 to a column named "closest" for the
    #closest era5 gridpoint to each district
    district_rows <- which(smallcrosswalk$district_map_index==small_district_centroids$district_map_index[district_index])
    min_district <- which.min(smallcrosswalk$distances[district_rows])
    smallcrosswalk$closest[district_rows[min_district]] <- 1
  }
  
  #only keep the closest era5 grid point to the district centroid
  #these districts are small - only one is necessary
  smallcrosswalk_mindist <- smallcrosswalk[which(smallcrosswalk$closest == 1),]
  smallcrosswalk_mindist$district_map_id <- district_list[smallcrosswalk_mindist$district_map_index] 
  smallcrosswalk_mindist <- smallcrosswalk_mindist %>% dplyr::select(-district_map_index,
                                                                     -closest, -distances)
  
  #join the crosswalk for small districts to the one for big districts
  crosswalk <- rbind(crosswalk, smallcrosswalk_mindist)
  crosswalk <- crosswalk[complete.cases(crosswalk),]
  
  district_state_names <- as.data.frame(cbind(india_shapes61$DIST61_ID, india_shapes61$STATE_UT, india_shapes61$NAME))
  names(district_state_names) <- c("district_map_id", "state_name", "district_name")
  
  crosswalk <- merge(crosswalk, district_state_names, by = "district_map_id")
  
  #CHANGE BELOW
  setwd("/Users/paulstainier/Dropbox/1_rii/2_working/input/crosswalk/district61")
  mapdistrict_icrisat_crosswalk <- read.csv("icrisat_shapefile61_crosswalk.csv")
  crosswalk <- merge(crosswalk, mapdistrict_icrisat_crosswalk, by = "district_map_id")
  
  #save the crosswalk file
  setwd("/Users/paulstainier/Dropbox/1_rii/2_working/input/crosswalk")
  write.csv(crosswalk, "era5_india61icrisat_spatial_crosswalk.csv", row.names=FALSE)
}

#nss weather
{
######################
#tmax_bins_and_districts
#create the tmax bins
#match with districts
#aggregate to year-month-district level
#####################
if(tmax_bins_and_districts){
  setwd(crosswalk_directory)
  crosswalk <- read.csv("era5_distid3_spatial_crosswalk.csv")
  crosswalk$X <- NULL
  for (year in 2002:2012){
    print(year)
    for (month in 1:12){
      print(month)
      #practice date allows use of days_in_month, which chooses appropriate number 
      #days for month-year combo
      practice_date <- as.Date(paste(year, month, 1, sep = "-"), "%Y-%m-%d")
      for(day in 1:days_in_month(practice_date)){
        
        setwd(paste(input_directory, "temp", sep = "/"))
        weather_data <- read.csv(paste("tmax2m_era5_india", year, "_", month, "_", day, ".csv", sep = ""))
        
        parameters <- c(70, 100, 10, 110)
        #create the less than 0 daily bin and set value to 1 for matching values
        weather_data[,"tmax_lt70"] <- ifelse(weather_data$tmax2m < 70, 1, 0)
        for (number in seq(parameters[1], parameters[2], by=parameters[3])){
          bin_name <- paste("tmax", number, number+parameters[3], sep = "_")
          weather_data[,bin_name] <- ifelse(weather_data$tmax2m >= number & weather_data$tmax2m < number + parameters[3], 1, 0)
        }
        
        #create other daily tmax bins
        bin_name <- paste("tmax", "_gt", parameters[4], sep = "")
        weather_data[,bin_name] <- ifelse(weather_data$tmax2m >= parameters[4], 1, 0)
        
        #make cubic spline
        k1 = 40
        k2 = 60
        k3 = 80
        k4 = 100
        #formulas from https://www.stata.com/manuals13/rmkspline.pdf
        weather_data$tmax_sp1 <- weather_data$tmax2m
        weather_data$tmax_sp2 = (pmax((weather_data$tmax2m - k1), 0)^3  -1/(k4-k3)*(pmax((weather_data$tmax2m - k3), 0)^3*(k4-k1) - pmax((weather_data$tmax2m - k4), 0)^3*(k3-k1)))/(k4-k1)^2
        weather_data$tmax_sp3 = (pmax((weather_data$tmax2m - k2), 0)^3  -1/(k4-k3)*(pmax((weather_data$tmax2m - k3), 0)^3*(k4-k2) - pmax((weather_data$tmax2m - k4), 0)^3*(k3-k2)))/(k4-k1)^2
        
        if(day == 1){
          month_bins_data <- weather_data
        }else{
          month_bins_data <- rbind(month_bins_data, weather_data)
        }
      }
      
      #collapse to monthly data per era5 point
      month_collapse_data <- month_bins_data %>%
        group_by(lat, lon) %>% 
        summarise(year = mean(year),
                  month = mean(month),
                  tmax_value=mean(tmax2m),
                  tmax_lt70=sum(tmax_lt70),
                  tmax_70_80=sum(tmax_70_80),
                  tmax_80_90=sum(tmax_80_90),
                  tmax_90_100=sum(tmax_90_100),
                  tmax_100_110=sum(tmax_100_110),
                  tmax_gt110=sum(tmax_gt110),
                  tmax_sp1=sum(tmax_sp1),
                  tmax_sp2=sum(tmax_sp2),
                  tmax_sp3=sum(tmax_sp3))
      
      
      #merge the monthly bins with the crosswalk and collapse to the distid3 month level
      month_collapse_data <- merge(month_collapse_data, crosswalk, by = c("lat", "lon"))
      month_distid3_data <- month_collapse_data %>%
        group_by(distid3) %>%
        summarise(year = mean(year),
                  month = mean(month),
                  tmax_value=mean(tmax_value),
                  tmax_lt70=mean(tmax_lt70),
                  tmax_70_80=mean(tmax_70_80),
                  tmax_80_90=mean(tmax_80_90),
                  tmax_90_100=mean(tmax_90_100),
                  tmax_100_110=mean(tmax_100_110),
                  tmax_gt110=mean(tmax_gt110),
                  tmax_sp1=mean(tmax_sp1),
                  tmax_sp2=mean(tmax_sp2),
                  tmax_sp3=mean(tmax_sp3))
      month_distid3_data <- round_df(month_distid3_data, 3)
      if(month == 1 & year == 2002){
        year_distid3_data <- month_distid3_data
      }else{
        year_distid3_data <- rbind(year_distid3_data, month_distid3_data)
      }
    }
  }
  setwd(output_directory)
  write.csv(year_distid3_data, "era5_tmax_2002_2012_distid3_month_key.csv", row.names=FALSE)
}

######################
#prec_districts
#match precipitation with districts
#aggregate to year district level
#####################
if(prec_districts){
  setwd(crosswalk_directory)
  crosswalk <- read.csv("era5_distid3_spatial_crosswalk.csv")
  crosswalk$X <- NULL
  for (year in 1979:2012){
    print(year)
    for (month in 1:12){
      #practice date allows use of days_in_month, which chooses appropriate number 
      #days for month-year combo
      practice_date <- as.Date(paste(year, month, 1, sep = "-"), "%Y-%m-%d")
      for(day in 1:days_in_month(practice_date)){
        
        setwd(paste(input_directory, "prec", sep = "/"))
        weather_data <- read.csv(paste("totalprec_era5_india", year, "_", month, "_", day, ".csv", sep = ""))
        
        if(day == 1){
          month_data <- weather_data
        }else{
          month_data <- rbind(month_data, weather_data)
        }
      }
      #collapse to total prec monthly
      month_collapse_data <- month_data %>% 
        group_by(lat, lon, year, month) %>% 
        summarise(prec=sum(prec)
        )
      #collapse to month-distid3 level
      month_collapse_data <- merge(month_collapse_data, crosswalk, by = c("lat", "lon"))
      month_distid3_data <- month_collapse_data %>%
        group_by(distid3, year, month) %>%
        summarise(prec=mean(prec)
        )
      month_distid3_data <- round_df(month_distid3_data, 3)
      if(month == 1 & year == 1979){
        year_distid3_data <- month_distid3_data
      }else{
        year_distid3_data <- rbind(year_distid3_data, month_distid3_data)
      }
    }
  }
  setwd(output_directory)
  write.csv(year_distid3_data, "era5_prec_1979_2012_distid3_month_key.csv", row.names=FALSE)
  year_distid3_data_nss_sample <- year_distid3_data[which(year_distid3_data$year >= 2002),]
  write.csv(year_distid3_data_nss_sample, "era5_prec_2002_2012_distid3_month_key.csv", row.names=FALSE)
}

######################
#tmax_variables
#create the tmax variables for the growing and non-growing season, and add them up
#last year, 2 years, 3 years, next year (placebo) bins
#####################
if(tmax_variables){
  setwd(output_directory)
  temp_data <- read.csv("era5_tmax_2002_2012_distid3_month_key.csv")
  district_list <- unique(temp_data$distid3)
  
  for(distid in district_list){
    print(distid)
    #get the data for this district and order it by yearmonth
    district_temp <- temp_data[which(temp_data$distid3 == distid),]
    district_temp <- district_temp[order(district_temp$year, district_temp$month),]
    growing_season_months <- c(6:12)
    district_temp$growing_season <- ifelse(district_temp$month %in% growing_season_months, "grow", "nogrow")
    district_temp$growing_season[which(district_temp$month %in% c(1:2))] <- "neither"
    district_temp$month <- NULL
    for(year in c(2003:2012)){
        #get the sum of data from last 3 years, and this one
        district_temp$years_index <- 0
        district_temp$years_index[which(district_temp$year == year - 1)] <- "last_y"
        district_temp$years_index[which(district_temp$year == year)] <- "this_y"
        
        #collapse by marker
        district_year_weather <- district_temp %>%
          group_by(distid3, years_index, growing_season) %>%
          summarise_all(sum)
        district_year_weather <- district_year_weather[which(district_year_weather$years_index != 0),]

        district_year_weather$year <- NULL
        district_year_weather$tmax_value[which(district_year_weather$growing_season == "grow")] <- district_year_weather$tmax_value[which(district_year_weather$growing_season == "grow")]/7
        district_year_weather$tmax_value[which(district_year_weather$growing_season == "nogrow")] <- district_year_weather$tmax_value[which(district_year_weather$growing_season == "nogrow")]/3
        district_year_weather$tmax_value[which(district_year_weather$growing_season == "neither")] <- district_year_weather$tmax_value[which(district_year_weather$growing_season == "neither")]/3
            district_year_weather <- as.data.frame(district_year_weather)
        
        district_year_weather$years_index_grow <- paste(district_year_weather$years_index, district_year_weather$growing_season, sep = "")
        district_year_weather$growing_season <- NULL
        district_year_weather$years_index <- NULL
        
        district_year_weather <- reshape(district_year_weather, idvar = "distid3", timevar = "years_index_grow", direction = "wide")
        district_year_weather$year <- year

        if(distid == 3 & year == 2003){
          final_temp_data <- district_year_weather
        }else{
          final_temp_data <- rbind(final_temp_data, district_year_weather)
        }
    }
  }
  
  for(year_name in c("last_y", "this_y")){
    for(var_name in c("lt70", "70_80", "80_90", "90_100", "100_110", "gt110", "sp1", "sp2", "sp3")){
      grow_name <- paste("tmax_", var_name, ".", year_name, "grow", sep= "")
      nogrow_name <- paste("tmax_", var_name, ".", year_name, "nogrow", sep= "")
      neither_name <- paste("tmax_", var_name, ".", year_name, "neither", sep= "")
      full_year_name <- paste("tmax_", var_name, ".", year_name, "all", sep= "")
      final_temp_data[,full_year_name] <- as.data.frame(final_temp_data[,grow_name] + final_temp_data[,nogrow_name] + final_temp_data[,neither_name])
      
    }
    for(var_name in c("value")){
      grow_name <- paste("tmax_", var_name, ".", year_name, "grow", sep= "")
      nogrow_name <- paste("tmax_", var_name, ".", year_name, "nogrow", sep= "")
      neither_name <- paste("tmax_", var_name, ".", year_name, "neither", sep= "")
      full_year_name <- paste("tmax_", var_name, ".", year_name, "all", sep= "")
      final_temp_data[,full_year_name] <- as.data.frame(final_temp_data[,grow_name]*7/12 + final_temp_data[,nogrow_name]*3/12 + final_temp_data[,neither_name]*2/12)
    }
  }
  write.csv(final_temp_data, "era5_tmax_variables_2003_2012_distid3_year_key_all.csv", row.names = F)
}

######################
#prec_total
#create the prec tota,s for the whole year
#total precipitation
#last year, 2 years, 3 years, next year (placebo) 
#####################
if(prec_totals){
  setwd(output_directory)
  rain_data <- read.csv("era5_prec_2002_2012_distid3_month_key.csv")
  #mm to meters
  rain_data$prec <- rain_data$prec/1000 
  district_list <- unique(rain_data$distid3)
  for(distid in district_list){
    print(distid)
    #get the data for this district and order it by yearmonth
    district_rain <- rain_data[which(rain_data$distid3 == distid),]
    district_rain <- district_rain[order(district_rain$year, district_rain$month),]
    growing_season_months <- c(6:12)
    district_rain$growing_season <- ifelse(district_rain$month %in% growing_season_months, "grow", "nogrow")
    district_rain$growing_season[which(district_rain$month %in% c(1:2))] <- "neither"
    district_rain$month <- NULL
    for(year in 2003:2012){
        district_rain$years_index <- 0
        district_rain$years_index[which(district_rain$year == year - 1)] <- "last_y"
        district_rain$years_index[which(district_rain$year == year)] <- "this_y"

        #collapse by marker
        district_month_rain <- district_rain %>%
          group_by(distid3, years_index, growing_season) %>%
          summarise_all(sum)
        district_month_rain <- district_month_rain[which(district_month_rain$years_index != 0),]
        
        district_month_rain$year <- NULL
        district_month_rain <- as.data.frame(district_month_rain)
        
        district_month_rain$years_index_grow <- paste(district_month_rain$years_index, district_month_rain$growing_season, sep = "")
        district_month_rain$growing_season <- NULL
        district_month_rain$years_index <- NULL
        
        district_month_rain <- reshape(district_month_rain, idvar = "distid3", timevar = "years_index_grow", direction = "wide")
        district_month_rain$year <- year

        
        if(distid == 3 & year == 2003){
          final_rain_data <- district_month_rain
        }else{
          final_rain_data <- rbind(final_rain_data, district_month_rain)
        }
        
    }
  }
  for(year_name in c("last_y", "this_y")){
    for(var_name in c("prec")){
      grow_name <- paste(var_name, ".", year_name, "grow", sep= "")
      nogrow_name <- paste(var_name, ".", year_name, "nogrow", sep= "")
      neither_name <- paste(var_name, ".", year_name, "neither", sep= "")
      full_year_name <- paste(var_name, ".", year_name, "all", sep= "")
      final_rain_data[,full_year_name] <- as.data.frame(final_rain_data[,grow_name] + final_rain_data[,nogrow_name] + final_rain_data[,neither_name])
    }
  }
  write.csv(final_rain_data, "era5_total_prec_2003_2012_distid3_year_key_all.csv", row.names = F)
  
}

######################
#prec_variables
#create the tmax variables for the whole year, grow and nogrow
#total precipitation
#>80%, <20%, deviation from 50%
#last year, 2 years, 3 years, next year (placebo) 
#####################
if(prec_variables){
  setwd(output_directory)
  rain_data_longterm <- read.csv("era5_prec_1979_2012_distid3_month_key.csv")
  rain_data_longterm$prec <- rain_data_longterm$prec/1000
  growing_season_months <- c(6:12)
  rain_data_longterm$growing_season <- ifelse(rain_data_longterm$month %in% growing_season_months, "grow", "nogrow")
  rain_data_longterm$growing_season[which(rain_data_longterm$month %in% c(1:2))] <- "neither"
  
  
  rain_data_longterm_distmonth <- rain_data_longterm %>% 
    group_by(distid3, year, growing_season) %>% 
    summarise(prec = sum(prec)) %>%
    ungroup()
  
  rain_data_longterm_distmonth <- as.data.frame(rain_data_longterm_distmonth)
  
  rain_data_longterm_allyear <- reshape(rain_data_longterm_distmonth, idvar = c("distid3", "year"), timevar = "growing_season", direction = "wide")
  rain_data_longterm_allyear$prec <- rain_data_longterm_allyear$prec.grow + rain_data_longterm_allyear$prec.nogrow
  rain_data_longterm_allyear <- dplyr::select(rain_data_longterm_allyear, distid3, year, prec)
  rain_data_longterm_allyear$growing_season <- "all"
  rain_data_longterm_distmonth <- rbind(rain_data_longterm_distmonth, rain_data_longterm_allyear)
  
  #create 20%, 50%, and 80% of ALL year-month combos per district 
  district_rain_percentiles <- rain_data_longterm_distmonth %>%
    group_by(distid3, growing_season) %>% 
    summarise(#"prec10%" = quantile(prec, probs = .1),
              "prec20%" = quantile(prec, probs = .2),
              #"prec33%" = quantile(prec, probs = .33),
              "prec50%" = quantile(prec, probs = .5),
              #"prec66%" = quantile(prec, probs = .66),
              "prec80%" = quantile(prec, probs = .8))
              #"prec90%" = quantile(prec, probs = .9))
  
  
  
  district_rain_percentiles <- as.data.frame(district_rain_percentiles)
  district_rain_percentiles <- reshape(district_rain_percentiles, idvar = "distid3", timevar = "growing_season", direction = "wide")
  
  rain_data <- read.csv("era5_total_prec_2003_2012_distid3_year_key_all.csv")
  rain_data <- merge(rain_data, district_rain_percentiles, by = "distid3")
  
  #use these percentile values to create the variables, dry/wet, howdry/howwet
  for(year_name in c("last_y", "this_y")){
    for(var_name in c("dry", "wet", "how_dry", "how_wet")){
      for(grow_name in c("grow", "nogrow", "all")){
        new_name <- paste(var_name, ".", year_name, grow_name, sep= "")
        prec_name <- paste("prec.", year_name, grow_name, sep= "")
        
        if(var_name == "dry"){
          condition_name <- paste("prec20%.",grow_name, sep= "")
          rain_data[, new_name] <- ifelse(rain_data[, prec_name] < rain_data[,condition_name], 1, 0) 
        }
        if(var_name == "wet"){
          condition_name <- paste("prec80%.",grow_name, sep= "")
          rain_data[, new_name] <- ifelse(rain_data[, prec_name] > rain_data[,condition_name], 1, 0) 
        }
        if(var_name == "how_dry"){
          condition_name <- paste("prec50%.",grow_name, sep= "")
          rain_data[, new_name] <- ifelse(rain_data[, prec_name] < rain_data[,condition_name], rain_data[,condition_name] -  rain_data[, prec_name], 0) 
        }
        if(var_name == "how_wet"){
          condition_name <- paste("prec50%.",grow_name, sep= "")
          rain_data[, new_name] <- ifelse(rain_data[, prec_name] > rain_data[,condition_name], rain_data[, prec_name]- rain_data[,condition_name] , 0) 
        }
      
      }
    }
  }
  write.csv(rain_data, "era5_prec_variables_2003_2012_distid3_year_key_all.csv", row.names = F)
}
}


#nfhs weather
{
  ######################
  #tmax_bins_and_districts
  #create the tmax bins
  #match with districts
  #aggregate to year-month-district level
  #####################
  if(tmax_bins_and_districts_nfhs){
    setwd(crosswalk_directory)
    crosswalk <- read.csv("era5_nfhs4_spatial_crosswalk.csv")
    crosswalk$X <- NULL
    for (year in 2008:2020){
      print(year)
      for (month in 1:12){
        print(month)
        #practice date allows use of days_in_month, which chooses appropriate number 
        #days for month-year combo
        practice_date <- as.Date(paste(year, month, 1, sep = "-"), "%Y-%m-%d")
        for(day in 1:days_in_month(practice_date)){
          
          setwd(paste(input_directory, "temp", sep = "/"))
          weather_data <- read.csv(paste("tmax2m_era5_india", year, "_", month, "_", day, ".csv", sep = ""))
          
          parameters <- c(50, 100, 10, 110)
          #create the less than 50 daily bin and set value to 1 for matching values
          weather_data[,"tmax_lt50"] <- ifelse(weather_data$tmax2m < 50, 1, 0)
          
          #create other daily tmax bins
          for (number in seq(parameters[1], parameters[2], by=parameters[3])){
            bin_name <- paste("tmax", number, number+parameters[3], sep = "_")
            weather_data[,bin_name] <- ifelse(weather_data$tmax2m >= number & weather_data$tmax2m < number + parameters[3], 1, 0)
          }
          
          #create gt110 bin
          bin_name <- paste("tmax", "_gt", parameters[4], sep = "")
          weather_data[,bin_name] <- ifelse(weather_data$tmax2m >= parameters[4], 1, 0)
          
          #create cdd (to match bannerjee)
          #they do cdd > 90, based on mean 
          #we can do cdd > 100 and 110 as well
          weather_data <- mutate(weather_data, cdd_90 = pmax(0, tmax2m - 90))
          weather_data <- mutate(weather_data, cdd_100 = pmax(0, tmax2m - 100))
          weather_data <- mutate(weather_data, cdd_110 = pmax(0, tmax2m - 110))
          
          if(day == 1){
            month_bins_data <- weather_data
          }else{
            month_bins_data <- rbind(month_bins_data, weather_data)
          }
        }
        
        #collapse to monthly data per era5 point
        month_collapse_data <- month_bins_data %>% 
          group_by(lat, lon) %>% 
          summarise(year = mean(year),
                    month = mean(month),
                    tmax_value=mean(tmax2m),
                    tmax_lt50=sum(tmax_lt50),
                    tmax_50_60=sum(tmax_50_60),
                    tmax_60_70=sum(tmax_60_70),
                    tmax_70_80=sum(tmax_70_80),
                    tmax_80_90=sum(tmax_80_90),
                    tmax_90_100=sum(tmax_90_100),
                    tmax_100_110=sum(tmax_100_110),
                    tmax_gt110=sum(tmax_gt110),
                    cdd_90=sum(cdd_90),
                    cdd_100=sum(cdd_100),
                    cdd_110=sum(cdd_110)
          )
        #merge the monthly bins with the crosswalk and collapse to the district_n4-month level
        month_collapse_data <- merge(month_collapse_data, crosswalk, by = c("lat", "lon"))
        month_district_n4_data <- month_collapse_data %>%
          group_by(district_n4) %>%
          summarise(year = mean(year),
                    month = mean(month),
                    tmax_value=mean(tmax_value),
                    tmax_lt50=mean(tmax_lt50),
                    tmax_50_60=mean(tmax_50_60),
                    tmax_60_70=mean(tmax_60_70),
                    tmax_70_80=mean(tmax_70_80),
                    tmax_80_90=mean(tmax_80_90),
                    tmax_90_100=mean(tmax_90_100),
                    tmax_100_110=mean(tmax_100_110),
                    tmax_gt110=mean(tmax_gt110),
                    cdd_90=mean(cdd_90),
                    cdd_100=mean(cdd_100),
                    cdd_110=mean(cdd_110)
                    )
        month_district_n4_data <- round_df(month_district_n4_data, 3)
        if(month == 1 & year == 2008){
          year_district_n4_data <- month_district_n4_data
        }else{
          year_district_n4_data <- rbind(year_district_n4_data, month_district_n4_data)
        }
      }
    }
    setwd(output_directory)
    write.csv(year_district_n4_data, "era5_tmax_2008_2020_district_n4_month_key.csv", row.names=FALSE)
  }
  
  ######################
  #prec_districts
  #match precipitation with districts
  #aggregate to year district level
  #####################
  if(prec_districts_nfhs){
    setwd(crosswalk_directory)
    crosswalk <- read.csv("era5_nfhs4_spatial_crosswalk.csv")
    crosswalk$X <- NULL
    for (year in 1979:2020){
      print(year)
      for (month in 1:12){
        #practice date allows use of days_in_month, which chooses appropriate number 
        #days for month-year combo
        practice_date <- as.Date(paste(year, month, 1, sep = "-"), "%Y-%m-%d")
        for(day in 1:days_in_month(practice_date)){
          
          setwd(paste(input_directory, "prec", sep = "/"))
          weather_data <- read.csv(paste("totalprec_era5_india", year, "_", month, "_", day, ".csv", sep = ""))
          
          if(day == 1){
            month_data <- weather_data
          }else{
            month_data <- rbind(month_data, weather_data)
          }
        }
        #collapse to total prec monthly
        month_collapse_data <- month_data %>% 
          group_by(lat, lon, year, month) %>% 
          summarise(prec=sum(prec)
          )
        #collapse to month-district_n4 level
        month_collapse_data <- merge(month_collapse_data, crosswalk, by = c("lat", "lon"))
        month_district_n4_data <- month_collapse_data %>%
          group_by(district_n4, year, month) %>%
          summarise(prec=mean(prec)
          )
        month_district_n4_data <- round_df(month_district_n4_data, 3)
        if(month == 1 & year == 1979){
          year_district_n4_data <- month_district_n4_data
        }else{
          year_district_n4_data <- rbind(year_district_n4_data, month_district_n4_data)
        }
      }
    }
    setwd(output_directory)
    write.csv(year_district_n4_data, "era5_prec_1979_2020_district_n4_month_key.csv", row.names=FALSE)
    year_district_n4_data_nss_sample <- year_district_n4_data[which(year_district_n4_data$year >= 2008),]
    write.csv(year_district_n4_data_nss_sample, "era5_prec_2008_2020_district_n4_month_key.csv", row.names=FALSE)
  }
  
  ######################
  #tmax_variables
  #create the tmax variables for the growing and non-growing season, and add them up
  #last year, 2 years, 3 years, next year (placebo) bins
  #####################
  if(tmax_variables_nfhs){
    setwd(output_directory)
    temp_data <- read.csv("era5_tmax_2008_2020_district_n4_month_key.csv")
    district_list <- unique(temp_data$district_n4)
    
    for(district_n4 in district_list){
      print(district_n4)
      #get the data for this district and order it by yearmonth
      district_temp <- temp_data[which(temp_data$district_n4 == district_n4),]
      district_temp <- district_temp[order(district_temp$year, district_temp$month),]
      growing_season_months <- c(6:12)
      district_temp$growing_season <- ifelse(district_temp$month %in% growing_season_months, "grow", "nogrow")
      district_temp$growing_season[which(district_temp$month %in% c(1:2))] <- "neither"
      district_temp$month <- NULL
      for(year in c(2009:2020)){
        #get the sum of data from last 3 years, and this one
        district_temp$years_index <- 0
        district_temp$years_index[which(district_temp$year == year - 1)] <- "last_y"
        district_temp$years_index[which(district_temp$year == year)] <- "this_y"
        
        #collapse by marker
        district_year_weather <- district_temp %>%
          group_by(district_n4, years_index, growing_season) %>%
          summarise_all(sum)
        district_year_weather <- district_year_weather[which(district_year_weather$years_index != 0),]
        
        district_year_weather$year <- NULL
        district_year_weather$tmax_value[which(district_year_weather$growing_season == "grow")] <- district_year_weather$tmax_value[which(district_year_weather$growing_season == "grow")]/7
        district_year_weather$tmax_value[which(district_year_weather$growing_season == "nogrow")] <- district_year_weather$tmax_value[which(district_year_weather$growing_season == "nogrow")]/3
        district_year_weather$tmax_value[which(district_year_weather$growing_season == "neither")] <- district_year_weather$tmax_value[which(district_year_weather$growing_season == "neither")]/3
        district_year_weather <- as.data.frame(district_year_weather)
        
        district_year_weather$years_index_grow <- paste(district_year_weather$years_index, district_year_weather$growing_season, sep = "")
        district_year_weather$growing_season <- NULL
        district_year_weather$years_index <- NULL
        
        district_year_weather <- reshape(district_year_weather, idvar = "district_n4", timevar = "years_index_grow", direction = "wide")
        district_year_weather$year <- year
        
        if(district_n4 == 1 & year == 2009){
          final_temp_data <- district_year_weather
        }else{
          final_temp_data <- rbind(final_temp_data, district_year_weather)
        }
      }
    }
    
    for(year_name in c("last_y", "this_y")){
      for(var_name in c("lt50", "50_60", "60_70", "70_80", "80_90", "90_100", "100_110", "gt110")){
        grow_name <- paste("tmax_", var_name, ".", year_name, "grow", sep= "")
        nogrow_name <- paste("tmax_", var_name, ".", year_name, "nogrow", sep= "")
        neither_name <- paste("tmax_", var_name, ".", year_name, "neither", sep= "")
        full_year_name <- paste("tmax_", var_name, ".", year_name, "all", sep= "")
        final_temp_data[,full_year_name] <- as.data.frame(final_temp_data[,grow_name] + final_temp_data[,nogrow_name] +  final_temp_data[,neither_name])
        
      }
      for(var_name in c("value")){
        grow_name <- paste("tmax_", var_name, ".", year_name, "grow", sep= "")
        nogrow_name <- paste("tmax_", var_name, ".", year_name, "nogrow", sep= "")
        neither_name <- paste("tmax_", var_name, ".", year_name, "neither", sep= "")
        full_year_name <- paste("tmax_", var_name, ".", year_name, "all", sep= "")
        final_temp_data[,full_year_name] <- as.data.frame(final_temp_data[,grow_name]*7/12 + final_temp_data[,nogrow_name]*3/12 + final_temp_data[,neither_name]*2/12)
      }
    }
    write.csv(final_temp_data, "era5_tmax_variables_2009_2020_district_n4_year_key_all.csv", row.names = F)
  }
  
  ######################
  #prec_total
  #create the prec tota,s for the whole year
  #total precipitation
  #last year, 2 years, 3 years, next year (placebo) 
  #####################
  if(prec_totals_nfhs){
    setwd(output_directory)
    rain_data <- read.csv("era5_prec_2008_2020_district_n4_month_key.csv")
    #mm to meters
    rain_data$prec <- rain_data$prec/1000 
    district_list <- unique(rain_data$district_n4)
    for(district_n4 in district_list){
      print(district_n4)
      #get the data for this district and order it by yearmonth
      district_rain <- rain_data[which(rain_data$district_n4 == district_n4),]
      district_rain <- district_rain[order(district_rain$year, district_rain$month),]
      growing_season_months <- c(6:12)
      district_rain$growing_season <- ifelse(district_rain$month %in% growing_season_months, "grow", "nogrow")
      district_rain$growing_season[which(district_rain$month %in% c(1:2))] <- "neither"
      district_rain$month <- NULL
      for(year in 2009:2020){
        district_rain$years_index <- 0
        district_rain$years_index[which(district_rain$year == year - 1)] <- "last_y"
        district_rain$years_index[which(district_rain$year == year)] <- "this_y"
        
        #collapse by marker
        district_month_rain <- district_rain %>%
          group_by(district_n4, years_index, growing_season) %>%
          summarise_all(sum)
        district_month_rain <- district_month_rain[which(district_month_rain$years_index != 0),]
        
        district_month_rain$year <- NULL
        district_month_rain <- as.data.frame(district_month_rain)
        
        district_month_rain$years_index_grow <- paste(district_month_rain$years_index, district_month_rain$growing_season, sep = "")
        district_month_rain$growing_season <- NULL
        district_month_rain$years_index <- NULL
        
        district_month_rain <- reshape(district_month_rain, idvar = "district_n4", timevar = "years_index_grow", direction = "wide")
        district_month_rain$year <- year
        
        
        if(district_n4 == 1 & year == 2009){
          final_rain_data <- district_month_rain
        }else{
          final_rain_data <- rbind(final_rain_data, district_month_rain)
        }
        
      }
    }
    for(year_name in c("last_y", "this_y")){
      for(var_name in c("prec")){
        grow_name <- paste(var_name, ".", year_name, "grow", sep= "")
        nogrow_name <- paste(var_name, ".", year_name, "nogrow", sep= "")
        neither_name <- paste(var_name, ".", year_name, "neither", sep= "")
        full_year_name <- paste(var_name, ".", year_name, "all", sep= "")
        final_rain_data[,full_year_name] <- as.data.frame(final_rain_data[,grow_name] + final_rain_data[,nogrow_name] + final_rain_data[,neither_name])
      }
    }
    write.csv(final_rain_data, "era5_total_prec_2009_2020_district_n4_year_key_all.csv", row.names = F)
    
  }
  
  ######################
  #prec_variables
  #create the tmax variables for the whole year, grow and nogrow
  #total precipitation
  #>80%, <20%, deviation from 50%
  #last year, 2 years, 3 years, next year (placebo) 
  #####################
  if(prec_variables_nfhs){
    setwd(output_directory)
    rain_data_longterm <- read.csv("era5_prec_1979_2020_district_n4_month_key.csv")
    rain_data_longterm$prec <- rain_data_longterm$prec/1000
    growing_season_months <- c(6:12)
    rain_data_longterm$growing_season <- ifelse(rain_data_longterm$month %in% growing_season_months, "grow", "nogrow")
    rain_data_longterm$growing_season[which(rain_data_longterm$month %in% c(1:2))] <- "neither"
    
    
    rain_data_longterm_distmonth <- rain_data_longterm %>% 
      group_by(district_n4, year, growing_season) %>% 
      summarise(prec = sum(prec)) %>%
      ungroup()
    
    rain_data_longterm_distmonth <- as.data.frame(rain_data_longterm_distmonth)
    
    rain_data_longterm_allyear <- reshape(rain_data_longterm_distmonth, idvar = c("district_n4", "year"), timevar = "growing_season", direction = "wide")
    rain_data_longterm_allyear$prec <- rain_data_longterm_allyear$prec.grow + rain_data_longterm_allyear$prec.nogrow
    rain_data_longterm_allyear <- dplyr::select(rain_data_longterm_allyear, district_n4, year, prec)
    rain_data_longterm_allyear$growing_season <- "all"
    rain_data_longterm_distmonth <- rbind(rain_data_longterm_distmonth, rain_data_longterm_allyear)
    
    #create 20%, 50%, and 80% of ALL year-month combos per district 
    district_rain_percentiles <- rain_data_longterm_distmonth %>%
      group_by(district_n4, growing_season) %>% 
      summarise(#"prec10%" = quantile(prec, probs = .1),
        "prec20%" = quantile(prec, probs = .2),
        #"prec33%" = quantile(prec, probs = .33),
        "prec50%" = quantile(prec, probs = .5),
        #"prec66%" = quantile(prec, probs = .66),
        "prec80%" = quantile(prec, probs = .8))
    #"prec90%" = quantile(prec, probs = .9))
    
    
    
    district_rain_percentiles <- as.data.frame(district_rain_percentiles)
    district_rain_percentiles <- reshape(district_rain_percentiles, idvar = "district_n4", timevar = "growing_season", direction = "wide")
    
    rain_data <- read.csv("era5_total_prec_2009_2020_district_n4_year_key_all.csv")
    rain_data <- merge(rain_data, district_rain_percentiles, by = "district_n4")
    
    #use these percentile values to create the variables, dry/wet, howdry/howwet
    for(year_name in c("last_y", "this_y")){
      for(var_name in c("dry", "wet", "how_dry", "how_wet")){
        for(grow_name in c("grow", "nogrow", "all")){
          new_name <- paste(var_name, ".", year_name, grow_name, sep= "")
          prec_name <- paste("prec.", year_name, grow_name, sep= "")
          
          if(var_name == "dry"){
            condition_name <- paste("prec20%.",grow_name, sep= "")
            rain_data[, new_name] <- ifelse(rain_data[, prec_name] < rain_data[,condition_name], 1, 0) 
          }
          if(var_name == "wet"){
            condition_name <- paste("prec80%.",grow_name, sep= "")
            rain_data[, new_name] <- ifelse(rain_data[, prec_name] > rain_data[,condition_name], 1, 0) 
          }
          if(var_name == "how_dry"){
            condition_name <- paste("prec50%.",grow_name, sep= "")
            rain_data[, new_name] <- ifelse(rain_data[, prec_name] < rain_data[,condition_name], rain_data[,condition_name] -  rain_data[, prec_name], 0) 
          }
          if(var_name == "how_wet"){
            condition_name <- paste("prec50%.",grow_name, sep= "")
            rain_data[, new_name] <- ifelse(rain_data[, prec_name] > rain_data[,condition_name], rain_data[, prec_name]- rain_data[,condition_name] , 0) 
          }
          
        }
      }
    }
    write.csv(rain_data, "era5_prec_variables_2009_2020_district_n4_year_key_all.csv", row.names = F)
  }
}


#icrisat_weather
{

######################
#tmax_bins_and_districts
#create the tmax bins
#match with districts
#aggregate to year-month-district level
#####################
if(tmax_bins_and_districts_icrisat){
  setwd(crosswalk_directory)
  crosswalk <- read.csv("era5_india61icrisat_spatial_crosswalk.csv")
  crosswalk$X <- NULL
  for (year in 1980:2013){
    print(year)
    for (month in 1:12){
      print(month)
      #practice date allows use of days_in_month, which chooses appropriate number 
      #days for month-year combo
      practice_date <- as.Date(paste(year, month, 1, sep = "-"), "%Y-%m-%d")
      for(day in 1:days_in_month(practice_date)){
        
        setwd(paste(input_directory, "temp", sep = "/"))
        weather_data <- read.csv(paste("tmax2m_era5_india", year, "_", month, "_", day, ".csv", sep = ""))
        
        parameters <- c(50, 100, 10, 110)
        #create the less than 0 daily bin and set value to 1 for matching values
        weather_data[,"tmax_lt50"] <- ifelse(weather_data$tmax2m < 50, 1, 0)
        for (number in seq(parameters[1], parameters[2], by=parameters[3])){
          bin_name <- paste("tmax", number, number+parameters[3], sep = "_")
          weather_data[,bin_name] <- ifelse(weather_data$tmax2m >= number & weather_data$tmax2m < number + parameters[3], 1, 0)
        }
        
        #create other daily tmax bins
        bin_name <- paste("tmax", "_gt", parameters[4], sep = "")
        weather_data[,bin_name] <- ifelse(weather_data$tmax2m >= parameters[4], 1, 0)
        if(day == 1){
          month_bins_data <- weather_data
        }else{
          month_bins_data <- rbind(month_bins_data, weather_data)
        }
      }
      #collapse to monthly data per era5 point
      month_collapse_data <- month_bins_data %>% 
        group_by(lat, lon, year, month) %>% 
        summarise(tmax_value=mean(tmax2m),
                  tmax_lt50=sum(tmax_lt50),
                  tmax_50_60=sum(tmax_50_60),
                  tmax_60_70=sum(tmax_60_70),
                  tmax_70_80=sum(tmax_70_80),
                  tmax_80_90=sum(tmax_80_90),
                  tmax_90_100=sum(tmax_90_100),
                  tmax_100_110=sum(tmax_100_110),
                  tmax_gt110=sum(tmax_gt110)
        )
      #merge the monthly bins with the crosswalk and collapse to the distid3 month level
      month_collapse_data <- merge(month_collapse_data, crosswalk, by = c("lat", "lon"))
      month_icrisat_district_data <- month_collapse_data %>%
        group_by(icrisat_district, year, month) %>%
        summarise(tmax_value=mean(tmax_value),
                  tmax_lt50=mean(tmax_lt50),
                  tmax_50_60=mean(tmax_50_60),
                  tmax_60_70=mean(tmax_60_70),
                  tmax_70_80=mean(tmax_70_80),
                  tmax_80_90=mean(tmax_80_90),
                  tmax_90_100=mean(tmax_90_100),
                  tmax_100_110=mean(tmax_100_110),
                  tmax_gt110=mean(tmax_gt110))
      month_icrisat_district_data <- round_df(month_icrisat_district_data, 3)
      if(month == 1 & year == 1980){
        year_icrisat_district_data <- month_icrisat_district_data
      }else{
        year_icrisat_district_data <- rbind(year_icrisat_district_data, month_icrisat_district_data)
      }
    }
  }
  setwd(output_directory)
  write.csv(year_icrisat_district_data, "era5_tmax_1980_2013_icrisat_district_month_key.csv", row.names=FALSE)
}

######################
#prec_districts
#match precipitation with districts
#aggregate to year district level
#####################
if(prec_districts_icrisat){
  setwd(crosswalk_directory)
  crosswalk <- read.csv("era5_india61icrisat_spatial_crosswalk.csv")
  crosswalk$X <- NULL
  for (year in 1979:2013){
    print(year)
    for (month in 1:12){
      #practice date allows use of days_in_month, which chooses appropriate number 
      #days for month-year combo
      practice_date <- as.Date(paste(year, month, 1, sep = "-"), "%Y-%m-%d")
      for(day in 1:days_in_month(practice_date)){
        
        setwd(paste(input_directory, "prec", sep = "/"))
        weather_data <- read.csv(paste("totalprec_era5_india", year, "_", month, "_", day, ".csv", sep = ""))
        
        if(day == 1){
          month_data <- weather_data
        }else{
          month_data <- rbind(month_data, weather_data)
        }
      }
      #collapse to total prec monthly
      month_collapse_data <- month_data %>% 
        group_by(lat, lon, year, month) %>% 
        summarise(prec=sum(prec)
        )
      #collapse to month-distid3 level
      month_collapse_data <- merge(month_collapse_data, crosswalk, by = c("lat", "lon"))
      month_icrisat_district_data <- month_collapse_data %>%
        group_by(icrisat_district, year, month) %>%
        summarise(prec=mean(prec)
        )
      month_icrisat_district_data <- round_df(month_icrisat_district_data, 3)
      if(month == 1 & year == 1979){
        year_icrisat_district_data <- month_icrisat_district_data
      }else{
        year_icrisat_district_data <- rbind(year_icrisat_district_data, month_icrisat_district_data)
      }
    }
  }
  setwd(output_directory)
  write.csv(year_icrisat_district_data, "era5_prec_1979_2013_icrisat_district_month_key.csv", row.names=FALSE)
}
  
  
######################
#tmax_variables_icrisat
#create the tmax variables for the growing and non-growing season, and add them up
#last year, 2 years, 3 years, next year (placebo) bins
#####################
if(tmax_variables_icrisat){
  setwd(output_directory)
  temp_data <- read.csv("era5_tmax_1980_2013_icrisat_district_month_key.csv")
  district_list <- unique(temp_data$icrisat_district)
  
  for(distid in district_list){
    print(distid)
    #get the data for this district and order it by yearmonth
    district_temp <- temp_data[which(temp_data$icrisat_district == distid),]
    district_temp <- district_temp[order(district_temp$year, district_temp$month),]
    growing_season_months <- c(6:12)
    district_temp$growing_season <- ifelse(district_temp$month %in% growing_season_months, "grow", "nogrow")
    district_temp$growing_season[which(district_temp$month %in% c(1:2))] <- "neither"
    district_temp$month <- NULL
    for(year in c(1980:2012)){
      #get the sum of data from last 3 years, and this one
      district_temp$years_index <- 0
      district_temp$years_index[which(district_temp$year == year)] <- "this_y"
      
      #collapse by marker
      district_year_weather <- district_temp %>%
        group_by(icrisat_district, years_index, growing_season) %>%
        summarise_all(sum)
      district_year_weather <- district_year_weather[which(district_year_weather$years_index != 0),]
      
      district_year_weather$year <- NULL
      district_year_weather$tmax_value[which(district_year_weather$growing_season == "grow")] <- district_year_weather$tmax_value[which(district_year_weather$growing_season == "grow")]/7
      district_year_weather$tmax_value[which(district_year_weather$growing_season == "nogrow")] <- district_year_weather$tmax_value[which(district_year_weather$growing_season == "nogrow")]/3
      district_year_weather$tmax_value[which(district_year_weather$growing_season == "neither")] <- district_year_weather$tmax_value[which(district_year_weather$growing_season == "neither")]/2
      district_year_weather <- as.data.frame(district_year_weather)
      
      district_year_weather$years_index_grow <- paste(district_year_weather$years_index, district_year_weather$growing_season, sep = "")
      district_year_weather$growing_season <- NULL
      district_year_weather$years_index <- NULL
      
      district_year_weather <- reshape(district_year_weather, idvar = "icrisat_district", timevar = "years_index_grow", direction = "wide")
      district_year_weather$year <- year
      
      if(distid == 1 & year == 1980){
        final_temp_data <- district_year_weather
      }else{
        final_temp_data <- rbind(final_temp_data, district_year_weather)
      }
    }
  }
  
  for(year_name in c("this_y")){
    for(var_name in c("lt50", "50_60", "60_70", "70_80",
                      "80_90", "90_100", "100_110", "gt110")){
      grow_name <- paste("tmax_", var_name, ".", year_name, "grow", sep= "")
      nogrow_name <- paste("tmax_", var_name, ".", year_name, "nogrow", sep= "")
      neither_name <- paste("tmax_", var_name, ".", year_name, "neither", sep= "")
      full_year_name <- paste("tmax_", var_name, ".", year_name, "all", sep= "")
      final_temp_data[,full_year_name] <- as.data.frame(final_temp_data[,grow_name] + final_temp_data[,nogrow_name] +  final_temp_data[,neither_name])
      #final_temp_data[,full_year_name] <- as.data.frame(final_temp_data[,grow_name] + final_temp_data[,nogrow_name] )
      
    }
    for(var_name in c("value")){
      grow_name <- paste("tmax_", var_name, ".", year_name, "grow", sep= "")
      nogrow_name <- paste("tmax_", var_name, ".", year_name, "nogrow", sep= "")
      neither_name <- paste("tmax_", var_name, ".", year_name, "neither", sep= "")
      full_year_name <- paste("tmax_", var_name, ".", year_name, "all", sep= "")
      final_temp_data[,full_year_name] <- as.data.frame(final_temp_data[,grow_name]*7/12 + final_temp_data[,nogrow_name]*3/12 + final_temp_data[,neither_name]*2/12)
      #final_temp_data[,full_year_name] <- as.data.frame(final_temp_data[,grow_name]*7/12 + final_temp_data[,nogrow_name]*5/12)
    }
  }
  
  write.csv(final_temp_data, "era5_tmax_variables_1980_2012_icrisat_district_year_key_all.csv", row.names = F)
}
  
######################
#prec_total
#create the prec tota,s for the whole year
#total precipitation
#last year, 2 years, 3 years, next year (placebo) 
#####################
if(prec_totals_icrisat){
  setwd(output_directory)
  rain_data <- read.csv("era5_prec_1979_2013_icrisat_district_month_key.csv")
  #mm to meters
  rain_data$prec <- rain_data$prec/1000 
  district_list <- unique(rain_data$icrisat_district)
  for(distid in district_list){
    print(distid)
    #get the data for this district and order it by yearmonth
    district_rain <- rain_data[which(rain_data$icrisat_district == distid),]
    district_rain <- district_rain[order(district_rain$year, district_rain$month),]
    growing_season_months <- c(6:12)
    district_rain$growing_season <- ifelse(district_rain$month %in% growing_season_months, "grow", "nogrow")
    district_rain$growing_season[which(district_rain$month %in% c(1:2))] <- "neither"
    district_rain$month <- NULL
    for(year in 1980:2012){
      district_rain$years_index <- 0
      district_rain$years_index[which(district_rain$year == year)] <- "this_y"

      #collapse by marker
      district_month_rain <- district_rain %>%
        group_by(icrisat_district, years_index, growing_season) %>%
        summarise_all(sum)
      district_month_rain <- district_month_rain[which(district_month_rain$years_index != 0),]
      
      district_month_rain$year <- NULL
      district_month_rain <- as.data.frame(district_month_rain)
      
      district_month_rain$years_index_grow <- paste(district_month_rain$years_index, district_month_rain$growing_season, sep = "")
      district_month_rain$growing_season <- NULL
      district_month_rain$years_index <- NULL
      
      district_month_rain <- reshape(district_month_rain, idvar = "icrisat_district", timevar = "years_index_grow", direction = "wide")
      district_month_rain$year <- year
      
      
      if(distid == 1 & year == 1980){
        final_rain_data <- district_month_rain
      }else{
        final_rain_data <- rbind(final_rain_data, district_month_rain)
      }
      
    }
  }
  for(year_name in c("this_y")){
    for(var_name in c("prec")){
      grow_name <- paste(var_name, ".", year_name, "grow", sep= "")
      nogrow_name <- paste(var_name, ".", year_name, "nogrow", sep= "")
      neither_name <- paste(var_name, ".", year_name, "neither", sep= "")
      full_year_name <- paste(var_name, ".", year_name, "all", sep= "")
      final_rain_data[,full_year_name] <- as.data.frame(final_rain_data[,grow_name] + final_rain_data[,nogrow_name] + final_rain_data[,neither_name])
      #final_rain_data[,full_year_name] <- as.data.frame(final_rain_data[,grow_name] + final_rain_data[,nogrow_name])
    }
  }
  write.csv(final_rain_data, "era5_total_prec_1980_2012_icrisat_district_year_key_all.csv", row.names = F)
  
}

######################
#prec_variables
#create the tmax variables for the whole year, grow and nogrow
#total precipitation
#>80%, <20%, deviation from 50%
#last year, 2 years, 3 years, next year (placebo) 
#####################
if(prec_variables_icrisat){
  setwd(output_directory)
  rain_data_longterm <- read.csv("era5_prec_1979_2013_icrisat_district_month_key.csv")
  rain_data_longterm$prec <- rain_data_longterm$prec/ 1000
  rain_data_longterm <- rain_data_longterm[which(rain_data_longterm$year < 2013),]
  growing_season_months <- c(6:12)
  rain_data_longterm$growing_season <- ifelse(rain_data_longterm$month %in% growing_season_months, "grow", "nogrow")
  rain_data_longterm$growing_season[which(rain_data_longterm$month %in% c(1:2))] <- "neither"
  
  
  rain_data_longterm_distmonth <- rain_data_longterm %>% 
    group_by(icrisat_district, year, growing_season) %>% 
    summarise(prec = sum(prec)) %>%
    ungroup()
  
  rain_data_longterm_distmonth <- as.data.frame(rain_data_longterm_distmonth)
  
  rain_data_longterm_allyear <- reshape(rain_data_longterm_distmonth, idvar = c("icrisat_district", "year"), timevar = "growing_season", direction = "wide")
  rain_data_longterm_allyear$prec <- rain_data_longterm_allyear$prec.grow + rain_data_longterm_allyear$prec.nogrow
  rain_data_longterm_allyear <- rain_data_longterm_allyear %>% dplyr::select(icrisat_district, year, prec)
  rain_data_longterm_allyear$growing_season <- "all"
  rain_data_longterm_distmonth <- rbind(rain_data_longterm_distmonth, rain_data_longterm_allyear)
  
  #create a function that captures percentiles
  p <- c(0.1, 0.2, 0.33, 0.5, 0.66, 0.8, 0.9)
  p_names <- map_chr(p, ~paste0("prec",.x*100, "p"))
  p_funs <- map(p, ~partial(quantile, probs = .x, na.rm = TRUE)) %>% 
    set_names(nm = p_names)
  
  #create 20%, 50%, and 80% of ALL year-month combos per district 
  district_rain_percentiles <- rain_data_longterm_distmonth %>%
    group_by(icrisat_district, growing_season) %>% 
    summarize_at(vars(prec), funs(!!!p_funs)) %>%
    ungroup()
  
  district_rain_percentiles <- as.data.frame(district_rain_percentiles)
  district_rain_percentiles <- reshape(district_rain_percentiles, idvar = "icrisat_district", timevar = "growing_season", direction = "wide")
  
  rain_data <- read.csv("era5_total_prec_1980_2012_icrisat_district_year_key_all.csv")
  rain_data <- merge(rain_data, district_rain_percentiles, by = "icrisat_district")
  
  #use these percentile values to create the variables, dry/wet, howdry/howwet
  for(year_name in c("this_y")){
    for(var_name in c("dry", "wet", "how_dry", "how_wet", "vdry", "vwet", "kdry", "kwet")){
      for(grow_name in c("grow", "nogrow", "all")){
        new_name <- paste(var_name, ".", year_name, grow_name, sep= "")
        prec_name <- paste("prec.", year_name, grow_name, sep= "")
        
        if(var_name == "dry"){
          condition_name <- paste("prec20p.",grow_name, sep= "")
          rain_data[, new_name] <- ifelse(rain_data[, prec_name] < rain_data[,condition_name], 1, 0) 
        }
        if(var_name == "wet"){
          condition_name <- paste("prec80p.",grow_name, sep= "")
          rain_data[, new_name] <- ifelse(rain_data[, prec_name] > rain_data[,condition_name], 1, 0) 
        }
        if(var_name == "how_dry"){
          condition_name <- paste("prec50p.",grow_name, sep= "")
          rain_data[, new_name] <- ifelse(rain_data[, prec_name] < rain_data[,condition_name], rain_data[,condition_name] -  rain_data[, prec_name], 0) 
        }
        if(var_name == "how_wet"){
          condition_name <- paste("prec50p.",grow_name, sep= "")
          rain_data[, new_name] <- ifelse(rain_data[, prec_name] > rain_data[,condition_name], rain_data[, prec_name]- rain_data[,condition_name] , 0) 
        }
        if(var_name == "vdry"){
          condition_name <- paste("prec10p.",grow_name, sep= "")
          rain_data[, new_name] <- ifelse(rain_data[, prec_name] < rain_data[,condition_name], 1, 0) 
        }
        if(var_name == "vwet"){
          condition_name <- paste("prec90p.",grow_name, sep= "")
          rain_data[, new_name] <- ifelse(rain_data[, prec_name] > rain_data[,condition_name], 1, 0) 
        }
        if(var_name == "kdry"){
          condition_name <- paste("prec33p.",grow_name, sep= "")
          rain_data[, new_name] <- ifelse(rain_data[, prec_name] < rain_data[,condition_name], 1, 0) 
        }
        if(var_name == "kwet"){
          condition_name <- paste("prec66p.",grow_name, sep= "")
          rain_data[, new_name] <- ifelse(rain_data[, prec_name] > rain_data[,condition_name], 1, 0) 
        }
      }
    }
  }
  rain_data <- rain_data %>% dplyr::select(-prec10p.all, -prec20p.all,         
 -prec33p.all,-prec50p.all,-prec66p.all,-prec80p.all, 
 -prec90p.all,-prec10p.grow,-prec20p.grow,-prec33p.grow, -prec50p.grow,-prec66p.grow,-prec80p.grow,-prec90p.grow,        
 -prec10p.neither,-prec20p.neither,-prec33p.neither,-prec50p.neither,     
   -prec66p.neither,-prec80p.neither,-prec90p.neither,-prec10p.nogrow,
 -prec20p.nogrow,-prec33p.nogrow,-prec50p.nogrow,-prec66p.nogrow, -prec80p.nogrow,-prec90p.nogrow)
  write.csv(rain_data, "era5_prec_variables_1980_2012_icrisat_district_year_key_all.csv", row.names = F)
}

}

#nss weather extension 
#2015-2024
{
######################
#tmax_bins_and_districts_2014_2024
#create the tmax bins
#match with districts
#aggregate to year-month-district level
#####################
if(tmax_bins_and_districts_2014_2024){
  setwd(crosswalk_directory)
  crosswalk <- read.csv("era5_distid3_spatial_crosswalk.csv")
  crosswalk$X <- NULL
  for (year in 2014:2024){
    print(year)
    for (month in 1:12){
      print(month)
      #practice date allows use of days_in_month, which chooses appropriate number 
      #days for month-year combo
      practice_date <- as.Date(paste(year, month, 1, sep = "-"), "%Y-%m-%d")
      for(day in 1:days_in_month(practice_date)){
        
        setwd(paste(input_directory, "temp", sep = "/"))
        weather_data <- read.csv(paste("tmax2m_era5_india", year, "_", month, "_", day, ".csv", sep = ""))
        
        parameters <- c(70, 100, 10, 110)
        #create the less than 0 daily bin and set value to 1 for matching values
        weather_data[,"tmax_lt70"] <- ifelse(weather_data$tmax2m < 70, 1, 0)
        for (number in seq(parameters[1], parameters[2], by=parameters[3])){
          bin_name <- paste("tmax", number, number+parameters[3], sep = "_")
          weather_data[,bin_name] <- ifelse(weather_data$tmax2m >= number & weather_data$tmax2m < number + parameters[3], 1, 0)
        }
        
        #create other daily tmax bins
        bin_name <- paste("tmax", "_gt", parameters[4], sep = "")
        weather_data[,bin_name] <- ifelse(weather_data$tmax2m >= parameters[4], 1, 0)
        
        #make cubic spline
        k1 = 40
        k2 = 60
        k3 = 80
        k4 = 100
        #formulas from https://www.stata.com/manuals13/rmkspline.pdf
        weather_data$tmax_sp1 <- weather_data$tmax2m
        weather_data$tmax_sp2 = (pmax((weather_data$tmax2m - k1), 0)^3  -1/(k4-k3)*(pmax((weather_data$tmax2m - k3), 0)^3*(k4-k1) - pmax((weather_data$tmax2m - k4), 0)^3*(k3-k1)))/(k4-k1)^2
        weather_data$tmax_sp3 = (pmax((weather_data$tmax2m - k2), 0)^3  -1/(k4-k3)*(pmax((weather_data$tmax2m - k3), 0)^3*(k4-k2) - pmax((weather_data$tmax2m - k4), 0)^3*(k3-k2)))/(k4-k1)^2
        
        if(day == 1){
          month_bins_data <- weather_data
        }else{
          month_bins_data <- rbind(month_bins_data, weather_data)
        }
      }
      
      #collapse to monthly data per era5 point
      month_collapse_data <- month_bins_data %>%
        group_by(lat, lon) %>% 
        summarise(year = mean(year),
                  month = mean(month),
                  tmax_value=mean(tmax2m),
                  tmax_lt70=sum(tmax_lt70),
                  tmax_70_80=sum(tmax_70_80),
                  tmax_80_90=sum(tmax_80_90),
                  tmax_90_100=sum(tmax_90_100),
                  tmax_100_110=sum(tmax_100_110),
                  tmax_gt110=sum(tmax_gt110),
                  tmax_sp1=sum(tmax_sp1),
                  tmax_sp2=sum(tmax_sp2),
                  tmax_sp3=sum(tmax_sp3))
      
      
      #merge the monthly bins with the crosswalk and collapse to the distid3 month level
      month_collapse_data <- merge(month_collapse_data, crosswalk, by = c("lat", "lon"))
      month_distid3_data <- month_collapse_data %>%
        group_by(distid3) %>%
        summarise(year = mean(year),
                  month = mean(month),
                  tmax_value=mean(tmax_value),
                  tmax_lt70=mean(tmax_lt70),
                  tmax_70_80=mean(tmax_70_80),
                  tmax_80_90=mean(tmax_80_90),
                  tmax_90_100=mean(tmax_90_100),
                  tmax_100_110=mean(tmax_100_110),
                  tmax_gt110=mean(tmax_gt110),
                  tmax_sp1=mean(tmax_sp1),
                  tmax_sp2=mean(tmax_sp2),
                  tmax_sp3=mean(tmax_sp3))
      month_distid3_data <- round_df(month_distid3_data, 3)
      if(month == 1 & year == 2014){
        year_distid3_data <- month_distid3_data
      }else{
        year_distid3_data <- rbind(year_distid3_data, month_distid3_data)
      }
    }
  }
  setwd(output_directory)
  write.csv(year_distid3_data, "era5_tmax_2014_2024_distid3_month_key.csv", row.names=FALSE)
}


######################
#tmax_variables
#create the tmax variables for the growing and non-growing season, and add them up
#last year, 2 years, 3 years, next year (placebo) bins
#####################
if(tmax_variables_2015_2024){
  setwd(output_directory)
  temp_data <- read.csv("era5_tmax_2014_2024_distid3_month_key.csv")
  district_list <- unique(temp_data$distid3)
  
  for(distid in district_list){
    print(distid)
    #get the data for this district and order it by yearmonth
    district_temp <- temp_data[which(temp_data$distid3 == distid),]
    district_temp <- district_temp[order(district_temp$year, district_temp$month),]
    growing_season_months <- c(6:12)
    district_temp$growing_season <- ifelse(district_temp$month %in% growing_season_months, "grow", "nogrow")
    district_temp$growing_season[which(district_temp$month %in% c(1:2))] <- "neither"
    district_temp$month <- NULL
    for(year in c(2015:2024)){
      #get the sum of data from last 3 years, and this one
      district_temp$years_index <- 0
      district_temp$years_index[which(district_temp$year == year - 1)] <- "last_y"
      district_temp$years_index[which(district_temp$year == year)] <- "this_y"
      
      #collapse by marker
      district_year_weather <- district_temp %>%
        group_by(distid3, years_index, growing_season) %>%
        summarise_all(sum)
      district_year_weather <- district_year_weather[which(district_year_weather$years_index != 0),]
      
      district_year_weather$year <- NULL
      district_year_weather$tmax_value[which(district_year_weather$growing_season == "grow")] <- district_year_weather$tmax_value[which(district_year_weather$growing_season == "grow")]/7
      district_year_weather$tmax_value[which(district_year_weather$growing_season == "nogrow")] <- district_year_weather$tmax_value[which(district_year_weather$growing_season == "nogrow")]/3
      district_year_weather$tmax_value[which(district_year_weather$growing_season == "neither")] <- district_year_weather$tmax_value[which(district_year_weather$growing_season == "neither")]/3
      district_year_weather <- as.data.frame(district_year_weather)
      
      district_year_weather$years_index_grow <- paste(district_year_weather$years_index, district_year_weather$growing_season, sep = "")
      district_year_weather$growing_season <- NULL
      district_year_weather$years_index <- NULL
      
      district_year_weather <- reshape(district_year_weather, idvar = "distid3", timevar = "years_index_grow", direction = "wide")
      district_year_weather$year <- year
      
      if(distid == 3 & year == 2015){
        final_temp_data <- district_year_weather
      }else{
        final_temp_data <- rbind(final_temp_data, district_year_weather)
      }
    }
  }
  
  for(year_name in c("last_y", "this_y")){
    for(var_name in c("lt70", "70_80", "80_90", "90_100", "100_110", "gt110", "sp1", "sp2", "sp3")){
      grow_name <- paste("tmax_", var_name, ".", year_name, "grow", sep= "")
      nogrow_name <- paste("tmax_", var_name, ".", year_name, "nogrow", sep= "")
      neither_name <- paste("tmax_", var_name, ".", year_name, "neither", sep= "")
      full_year_name <- paste("tmax_", var_name, ".", year_name, "all", sep= "")
      final_temp_data[,full_year_name] <- as.data.frame(final_temp_data[,grow_name] + final_temp_data[,nogrow_name] + final_temp_data[,neither_name])
      
    }
    for(var_name in c("value")){
      grow_name <- paste("tmax_", var_name, ".", year_name, "grow", sep= "")
      nogrow_name <- paste("tmax_", var_name, ".", year_name, "nogrow", sep= "")
      neither_name <- paste("tmax_", var_name, ".", year_name, "neither", sep= "")
      full_year_name <- paste("tmax_", var_name, ".", year_name, "all", sep= "")
      final_temp_data[,full_year_name] <- as.data.frame(final_temp_data[,grow_name]*7/12 + final_temp_data[,nogrow_name]*3/12 + final_temp_data[,neither_name]*2/12)
    }
  }
  write.csv(final_temp_data, "era5_tmax_variables_2015_2024_distid3_year_key_all.csv", row.names = F)
}
}
